# Alex Gazmararian
# agazmararian@gmail.com

library(tidyverse)
library(tidylog)
library(readxl)
library(here)
library(modelsummary)
library(tidycensus)

# Output file path
output_file <- here("data", "inter", "acs2023_county_processed.csv")

# Cache the RAW download, not the processed output
cache_file <- here("data", "cache", "acs2023_raw_download.rds")

# Variables to download
getvars <- c(
  # Education
  "B06009_001", "B06009_005", "B06009_006",
  # Broadband
  "B28002_004",
  # Poverty rate
  "B14006_002", # income below poverty level in last 12 months
  "B06012_002", # total below 100% of the poverty level
  # Foreign born population
  "B06012_017", 
  # Housing costs - median monthly housing costs
  "B25105_001"
)

# Step 1: Get raw data (from cache or download)
if (file.exists(cache_file)) {
  message("[OK] Using cached Census ACS download from data/cache/")
  census <- readRDS(cache_file)
} else {
  # Need to download from Census API
  message("Downloading Census ACS data from API...")
  
  # Load Census API key from .Renviron
  if (Sys.getenv("CENSUS_API_KEY") != "") {
    census_api_key(Sys.getenv("CENSUS_API_KEY"))
    message("[OK] Census API key loaded from .Renviron")
  } else {
    stop("Census API key not found. Please add CENSUS_API_KEY to your .Renviron file.\n",
         "To set up:\n",
         "1. Get a free API key from: https://api.census.gov/data/key_signup.html\n",
         "2. Add this line to your .Renviron file: CENSUS_API_KEY=your_key_here\n",
         "3. Restart R session\n",
         "Or run: usethis::edit_r_environ() to open .Renviron file")
  }
  
  census <- get_acs(geography = "county", variables = getvars, year = 2023)
  
  # Cache the raw download
  dir.create(dirname(cache_file), recursive = TRUE, showWarnings = FALSE)
  saveRDS(census, cache_file)
  message("[OK] Cached raw ACS download to: ", cache_file)
}

# Step 2: Process the data (always runs, uses cached or fresh download)
g <- census %>%
  mutate(fips = GEOID) %>%
  pivot_wider(id_cols = fips, names_from = variable, values_from = estimate)
g <- g %>%
  mutate(
    college_acs = (B06009_005 + B06009_006) / B06009_001,
    bb_acs = B28002_004 / B06009_001,
    poverty_acs = B06012_002 / B06009_001,
    foreign_acs = B06012_017 / B06009_001,
    housing_acs = B25105_001
  )
g <- subset(g, select = c(fips, college_acs:housing_acs))
g$fips <- as.numeric(g$fips)

# Harmonize FIPS to pre-2023 basis
g$fips[g$fips == 09120] <- 09001

write_csv(g, output_file)
message("[OK] Saved processed ACS data to: ", output_file)